We study data on Mongolia and Japan.

WDI

knitr::include_url("https://icu-hsuzuki.github.io/science/index-j.html")

Setup

Sys.setenv(LANG = "en")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(WDI)

Education

Education in Japan

url_edu1 <- "https://www.gender.go.jp/about_danjo/whitepaper/r03/zentai/html/honpen/csv/zuhyo01-05-01.csv"
download.file(url_edu1, destfile = "data/edu1.csv")
guess_encoding("data/edu1.csv")
df_edu1 <- read_csv("data/edu1.csv", locale = locale(encoding = "Shift-JIS"), skip = 2)
## Rows: 71 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): 年度
## dbl (9): 高等学校等(男子), 高等学校等(女子), 専修学校(専門課程,男子), 専修学校(専門課程,女子), 大学(学部,男子), 大学(学...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_edu1
colnames(df_edu1)
##  [1] "年度"                       "高等学校等(男子)"        
##  [3] "高等学校等(女子)"         "専修学校(専門課程,男子)"
##  [5] "専修学校(専門課程,女子)" "大学(学部,男子)"        
##  [7] "大学(学部,女子)"         "短期大学(本科,女子)"    
##  [9] "大学院(男子)"             "大学院(女子)"
df_edu1$年度
##  [1] "昭和25" "26"     "27"     "28"     "29"     "30"     "31"     "32"    
##  [9] "33"     "34"     "35"     "36"     "37"     "38"     "39"     "40"    
## [17] "41"     "42"     "43"     "44"     "45"     "46"     "47"     "48"    
## [25] "49"     "50"     "51"     "52"     "53"     "54"     "55"     "56"    
## [33] "57"     "58"     "59"     "60"     "61"     "62"     "63"     "平成元"
## [41] "2"      "3"      "4"      "5"      "6"      "7"      "8"      "9"     
## [49] "10"     "11"     "12"     "13"     "14"     "15"     "16"     "17"    
## [57] "18"     "19"     "20"     "21"     "22"     "23"     "24"     "25"    
## [65] "26"     "27"     "28"     "29"     "30"     "令和元" "2"
df_edu0 <- df_edu1
colnames(df_edu0) <- c("year", "highschool_m", "highschool_f", "vocational_m", "vocational_f", "university_m", "university_f", "juniorcol_f", "gradschool_m", "gradschool_f")
df_edu00 <- df_edu0 %>% mutate(year = 1950:2020, 
                   highschool = (highschool_m + highschool_f)/2,
                   vocational = (vocational_m + vocational_f)/2,
                   university = (university_m + university_f)/2, 
                   juniorcol = juniorcol_f,
                   gradschool = (gradschool_m + gradschool_f)/2) 
df_edu00 %>% select(-(2:10)) %>%
  pivot_longer(!year, names_to = "schools", values_to = "percentage") %>%
  ggplot(aes(x = year, y = percentage, color = schools)) +
  geom_line()
## Warning: Removed 49 rows containing missing values (`geom_line()`).

df_edu11 <- df_edu1 %>% mutate(年 = 1950:2020, 
                   高等学校 = (`高等学校等(男子)`+ `高等学校等(女子)`)/2,
                   専修学校 = (`専修学校(専門課程,男子)`+ `専修学校(専門課程,女子)`)/2,
                   大学 = (`大学(学部,男子)` + `大学(学部,女子)`)/2, 
                   短期大学 = `短期大学(本科,女子)`,
                   大学院 = (`大学院(男子)` + `大学院(女子)`)/2) %>%
  select(-(1:10))
df_edu11
df_edu11 %>% 
  pivot_longer(2:6, names_to = "学校", values_to = "進学率") %>%
  ggplot(aes(x = 年, y = 進学率, color = 学校)) +
  geom_line()
df_edu11 %>% 
  pivot_longer(2:6, names_to = "学校", values_to = "進学率") %>%
  ggplot(aes(x = 年, y = 進学率, color = 学校)) +
  geom_line() +
  theme_gray (base_family = "HiraginoSans-W3") # or base_family = "HiraKakuPro-W3"
## Warning: Removed 49 rows containing missing values (`geom_line()`).

df_edu1 %>% mutate(year = 1950:2020) %>%
  pivot_longer(2:10, names_to = "学校", values_to = "進学率") %>%
  ggplot(aes(x = year, y = 進学率, color = 学校)) +
  geom_line() +
  theme_gray (base_family = "HiraKakuPro-W3") # or base_family = "HiraginoSans-W3"
## Warning: Removed 94 rows containing missing values (`geom_line()`).

#{r dev='rag_png'} df_edu1 %>% mutate(year = 1950:2020) %>% pivot_longer(2:10, names_to = "学校", values_to = "進学率") %>% ggplot(aes(x = year, y = 進学率, color = 学校)) + geom_line()